package com.codejiwei.core.rdd

import org.apache.spark.{SparkConf, SparkContext}

object Spark_Operator_Distinct {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[1]").setAppName("Distinct")
    val sc = new SparkContext(conf)
    val rdd = sc.makeRDD(List(1, 2, 3, 4, 5, 6, 1, 2, 1), 2)

    //distinct(n)可以在去重的时候更改分区数量。
    val rdd1 = rdd.distinct(3)

    rdd1.saveAsTextFile("output")
    sc.stop()
  }
}
